As mentioned in the EDA tab, we will drill down into the dataset with two main depths RCP and scenario. For each RCP value (4.5, 8.5), I will conduct the following four types of analysis to compare and contrast important variables that separate the scenarios and effect the annual temperature
Methodology
Scatterplot
PCA
Pearson Correlation
RMSE
Import module / Set options and theme
import pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsimport numpy as npimport xml.etree.ElementTree as ETimport plotly.express as pximport plotly.graph_objects as gofrom scipy.stats import ttest_relfrom statsmodels.stats.weightstats import ttest_indimport numpy as npimport pingouin as pgfrom scipy.stats import zscoreimport plotly.graph_objects as goimport pandas as pdfrom plotly.subplots import make_subplotsimport warningsimport numpy as npimport pandas as pdfrom sklearn.decomposition import PCAfrom sklearn.cluster import KMeansfrom sklearn.preprocessing import StandardScalerimport matplotlib.pyplot as pltimport plotly.graph_objects as goimport numpy as npimport pandas as pdfrom sklearn.decomposition import PCAfrom sklearn.cluster import KMeansfrom sklearn.preprocessing import StandardScalerimport plotly.express as pxfrom sklearn.manifold import TSNEimport matplotlib.pyplot as pltimport numpy as npimport pandas as pdwarnings.filterwarnings("ignore")pd.set_option('display.max_columns', None)pd.set_option('display.precision', 10)
With a basic scatterplot, we can see basic correlations of how each numerical variable correlates to either the annual temperature or the annual percipitation. Since RCP 8.5 and RCP 4.5 have different predictions, two plots were used for each scenario.
Firstly, without an additional feature, we can see that the more percipitation, the lower the annual temperature because we can easily draw a line with a negative slope through the scaterred plots.
4.5 vs 8.5 scatterplot
# Assuming df_con is your DataFrame and is already loaded# List of columns to use for coloringtest = df_con.iloc[:,list(range(1, 3))+ [4,6] +list(range(8, len(df_orig.columns)-1))]color_columns =list(test.columns)rcp_values = test['RCP'].unique()subplot_titles = [f'RCP {rcp}'for rcp in rcp_values]# Create figure with subplots for each RCP valuefig = make_subplots(rows=1, cols=len(rcp_values), shared_yaxes=True, subplot_titles=subplot_titles, horizontal_spacing=0.15)# Add a scatter trace for each color column and each RCP valuefor i, col inenumerate(color_columns):for j, rcp inenumerate(rcp_values): fig.add_trace( go.Scatter( x=test[(test['year'].isin(range(2060, 2100))) & (test['RCP'] == rcp)]['PPT_Annual'], y=test[(test['year'].isin(range(2060, 2100))) & (test['RCP'] == rcp)]['T_Annual'], mode='markers', marker=dict( color=test[(test['year'].isin(range(2060, 2100))) & (test['RCP'] == rcp)][col], colorbar=dict(# title='Scale', tickmode='array', tickvals=[round(i,2) for i in np.linspace(start=round(min(test[(test['year'].isin(range(2060, 2100)) & (test['RCP'] == rcp))][col]),2),stop=round(max(test[(test['year'].isin(range(2060, 2100)) & (test['RCP'] == rcp))][col]),2),num=5)], ticktext=[round(i,2) for i in np.linspace(start=round(min(test[(test['year'].isin(range(2060, 2100)) & (test['RCP'] == rcp))][col]),2),stop=round(max(test[(test['year'].isin(range(2060, 2100)) & (test['RCP'] == rcp))][col]),2),num=5)], y=0.5, x=0.43+ (j*0.58) ), colorscale='rdpu' ), name=col, visible=Trueif i ==0elseFalse, hovertemplate=(f"<b>{col}</b><br>""Precipitation: %{x}<br>""Temperature: %{y}<br>""RCP: "+str(rcp) +"<br>""Value: %{marker.color}<br>""<extra></extra>" ) # This hides the secondary box with trace info # Only the first trace is visible initially ), row=1, col=j+1 )# Updating the layout to add the titlefig.update_layout( title={'text': '<b>Annual Precipitation vs Temperature by RCP Scenarios</b>','x': 0.5,'y': 0.97,'xanchor': 'center' },# title_font=dict(size=20), showlegend=False# Hide legend since we are using colorbars)# Adding dropdown filter to change visible tracedropdown_buttons = [ {'label': col,'method': 'update','args': [ {'visible': [col == color_column for color_column in color_columns for _ in rcp_values] }, {'title': {'text': f'<b>Annual Precipitation vs Temperature by {col}</b>', 'x':0.5, 'y':0.97},'marker': {'colorbar': {'title': 'Scale'}} } ] }for col in color_columns]fig.update_layout( updatemenus=[ {'buttons': dropdown_buttons,'direction': 'down','showactive': True,'x': 0.5,'xanchor': 'center','y': 1.23,'yanchor': 'top' } ])fig.update_xaxes(title_text="Annual Precipitation", row=1, col=1)fig.update_yaxes(title_text="Annual Temperature", row=1, col=1)fig.update_xaxes(title_text="Annual Precipitation", row=1, col=2)for annotation in fig['layout']['annotations']: annotation['font'] = {'size': 12, 'color': 'black'}# Show the figurefig.show()